import ast
import base64
import datetime
import json
import logging
import gradio as gr
import  sys
import time
import traceback

import torch

from common_utils.utils4infer import get_feat_from_wav_path, load_model_and_tokenizer, token_list2wav

sys.path.insert(0, '.')
sys.path.insert(0, './tts')
sys.path.insert(0, './tts/third_party/Matcha-TTS')
from patches import modelling_qwen2_infer_gpu  # 打patch
from tts.cosyvoice.cli.cosyvoice import CosyVoice
from tts.cosyvoice.utils.file_utils import load_wav

is_npu = False
try:
    import torch_npu
except ImportError:
    is_npu = False
    print("torch_npu is not available. if you want to use npu, please install it.")



cosyvoice_model_path = "/home/work_nfs9/asr_data/ckpt/CosyVoice-300M-25Hz"



gpu_id=0
device = torch.device(f'cuda:{gpu_id}')
cosyvoice = CosyVoice(cosyvoice_model_path, gpu_id=gpu_id)

prompt_audio_choices = [
    {"name": "拟人",
     "value": "./tts/assert/prompt.wav"},
]

prompt_audio_cache = {}
for item in prompt_audio_choices:
    prompt_audio_cache[item["value"]] = load_wav(item["value"], 22050)


token_list = [1252, 1915, 1406, 2209, 701, 961, 2571, 1504, 3898, 3898, 53, 3649, 2978, 624, 2844, 6, 1847, 1755, 1147, 1243, 2583, 2583, 2583, 59, 46, 2203, 186, 21, 3898, 3062, 3062, 53, 3649, 373, 1055, 368, 3106, 2582, 2504, 1847, 568, 890, 3775, 2323, 1420, 2172, 3844, 368, 87, 59, 1847, 984, 540, 1260, 773, 101, 43, 1868, 2172, 3106, 2680, 124, 1615, 6, 1243, 3278, 890, 3818, 1313, 2247, 1116, 59, 349, 2537, 351, 1404, 3305, 1734, 1504, 1504, 1504, 1504, 1504, 2630, 3898, 1027, 1879, 10, 380, 380, 1796, 3612, 2504, 3062, 927, 2504, 1785, 3910, 1842, 848, 54, 3347, 1906, 4082, 1954, 1694, 73, 1317, 124, 1406, 20, 103, 3274, 44, 302, 53, 636, 1734, 1504, 21, 21, 53, 53, 53, 1577, 3478, 1579, 2306, 386, 1147, 1868, 2172, 858, 2250, 3635, 1785, 997, 1223, 3106, 2664, 822, 2058, 2399, 234, 646, 2554, 1193, 1318, 1176, 1650, 368, 4082, 101, 2409, 2884, 2409, 101, 3340, 455, 3929, 1734, 3898, 3062, 3649, 3649, 1736, 501, 59, 2368, 773, 1570, 2037, 1281, 2243, 2858, 3649, 6, 927, 1002, 1002, 2247, 1404, 1600, 773, 3106, 3179, 2368, 1716, 3554, 1854, 2203, 3106, 1018, 962, 2691, 375, 3761, 1252, 2666, 1609, 1821, 3818, 714, 1385, 60, 1516, 2844, 1854, 3844, 754, 1698, 1342, 1577, 31, 3898, 1027, 2809, 4006, 2, 3437, 2610, 347, 1454, 2285, 1256, 1602, 3437, 3062, 28, 3684, 2714, 2932, 386, 1223, 1044, 44, 3554, 271, 1716, 3179, 2099, 1602, 568, 699, 3741, 2323, 1243, 2172, 3844, 1660, 1660, 2583, 2151, 1734, 3898, 53, 53, 2050, 700, 1027, 1185, 271, 2786, 1044, 1501, 1291, 2099, 2569, 3600, 2172, 501, 1292, 119, 570, 2426, 2597, 1117, 1504, 1083, 1868, 2164, 1289, 2050, 1240, 2, 2786, 209, 2714, 358, 1030, 2250, 347, 3265, 540, 3179, 54, 1712, 62, 2000, 1712, 510, 380, 1734, 3898, 3649, 1736, 1653, 1040, 3612, 3612, 2664, 1868, 60, 2537, 1618, 3492, 48, 1312, 1611, 569, 28, 2657, 3635, 343, 1602, 2907, 3600, 1073, 569, 3775, 1333, 3554, 61, 122, 781, 84, 740, 3582, 58, 629, 2569, 890, 421, 3929, 186, 2844, 27]
output_wav_path = "./xlgeng.wav"
token_list2wav(token_list,prompt_audio_cache['./tts/assert/prompt.wav'], output_wav_path, cosyvoice)